{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Softmax exercise\n",
    "\n",
    "*Complete and hand in this completed worksheet (including its outputs and any supporting code outside of the worksheet) with your assignment submission. For more details see the [assignments page](http://vision.stanford.edu/teaching/cs231n/assignments.html) on the course website.*\n",
    "\n",
    "This exercise is analogous to the SVM exercise. You will:\n",
    "\n",
    "- implement a fully-vectorized **loss function** for the Softmax classifier\n",
    "- implement the fully-vectorized expression for its **analytic gradient**\n",
    "- **check your implementation** with numerical gradient\n",
    "- use a validation set to **tune the learning rate and regularization** strength\n",
    "- **optimize** the loss function with **SGD**\n",
    "- **visualize** the final learned weights\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import random\n",
    "import numpy as np\n",
    "from cs231n.data_utils import load_CIFAR10\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "from __future__ import print_function\n",
    "\n",
    "%matplotlib inline\n",
    "plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots\n",
    "plt.rcParams['image.interpolation'] = 'nearest'\n",
    "plt.rcParams['image.cmap'] = 'gray'\n",
    "\n",
    "# for auto-reloading extenrnal modules\n",
    "# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train data shape:  (49000, 3073)\n",
      "Train labels shape:  (49000,)\n",
      "Validation data shape:  (1000, 3073)\n",
      "Validation labels shape:  (1000,)\n",
      "Test data shape:  (1000, 3073)\n",
      "Test labels shape:  (1000,)\n",
      "dev data shape:  (500, 3073)\n",
      "dev labels shape:  (500,)\n"
     ]
    }
   ],
   "source": [
    "def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500):\n",
    "    \"\"\"\n",
    "    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare\n",
    "    it for the linear classifier. These are the same steps as we used for the\n",
    "    SVM, but condensed to a single function.  \n",
    "    \"\"\"\n",
    "    # Load the raw CIFAR-10 data\n",
    "    cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'\n",
    "    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)\n",
    "    \n",
    "    # subsample the data\n",
    "    mask = list(range(num_training, num_training + num_validation))\n",
    "    X_val = X_train[mask]\n",
    "    y_val = y_train[mask]\n",
    "    mask = list(range(num_training))\n",
    "    X_train = X_train[mask]\n",
    "    y_train = y_train[mask]\n",
    "    mask = list(range(num_test))\n",
    "    X_test = X_test[mask]\n",
    "    y_test = y_test[mask]\n",
    "    mask = np.random.choice(num_training, num_dev, replace=False)\n",
    "    X_dev = X_train[mask]\n",
    "    y_dev = y_train[mask]\n",
    "    \n",
    "    # Preprocessing: reshape the image data into rows\n",
    "    X_train = np.reshape(X_train, (X_train.shape[0], -1))\n",
    "    X_val = np.reshape(X_val, (X_val.shape[0], -1))\n",
    "    X_test = np.reshape(X_test, (X_test.shape[0], -1))\n",
    "    X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))\n",
    "    \n",
    "    # Normalize the data: subtract the mean image\n",
    "    mean_image = np.mean(X_train, axis = 0)\n",
    "    X_train -= mean_image\n",
    "    X_val -= mean_image\n",
    "    X_test -= mean_image\n",
    "    X_dev -= mean_image\n",
    "    \n",
    "    # add bias dimension and transform into columns\n",
    "    X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])\n",
    "    X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])\n",
    "    X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])\n",
    "    X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])\n",
    "    \n",
    "    return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev\n",
    "\n",
    "\n",
    "# Invoke the above function to get our data.\n",
    "X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev = get_CIFAR10_data()\n",
    "print('Train data shape: ', X_train.shape)\n",
    "print('Train labels shape: ', y_train.shape)\n",
    "print('Validation data shape: ', X_val.shape)\n",
    "print('Validation labels shape: ', y_val.shape)\n",
    "print('Test data shape: ', X_test.shape)\n",
    "print('Test labels shape: ', y_test.shape)\n",
    "print('dev data shape: ', X_dev.shape)\n",
    "print('dev labels shape: ', y_dev.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Softmax Classifier\n",
    "\n",
    "Your code for this section will all be written inside **cs231n/classifiers/softmax.py**. \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "loss: 2.343873\n",
      "sanity check: 2.302585\n"
     ]
    }
   ],
   "source": [
    "# First implement the naive softmax loss function with nested loops.\n",
    "# Open the file cs231n/classifiers/softmax.py and implement the\n",
    "# softmax_loss_naive function.\n",
    "\n",
    "from cs231n.classifiers.softmax import softmax_loss_naive\n",
    "import time\n",
    "\n",
    "# Generate a random softmax weight matrix and use it to compute the loss.\n",
    "W = np.random.randn(3073, 10) * 0.0001\n",
    "loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0)\n",
    "\n",
    "# As a rough sanity check, our loss should be something close to -log(0.1).\n",
    "print('loss: %f' % loss)\n",
    "print('sanity check: %f' % (-np.log(0.1)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Inline Question 1:\n",
    "Why do we expect our loss to be close to -log(0.1)? Explain briefly.**\n",
    "\n",
    "**Your answer:** *Fill this in*\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "numerical: 0.245895 analytic: 0.245895, relative error: 1.976390e-07\n",
      "numerical: 1.837746 analytic: 1.837746, relative error: 1.041965e-08\n",
      "numerical: 0.292038 analytic: 0.292038, relative error: 1.350460e-07\n",
      "numerical: -2.560516 analytic: -2.560516, relative error: 7.767839e-09\n",
      "numerical: -0.349402 analytic: -0.349402, relative error: 1.011910e-07\n",
      "numerical: 0.295307 analytic: 0.295307, relative error: 1.739994e-08\n",
      "numerical: -4.315507 analytic: -4.315507, relative error: 1.039702e-08\n",
      "numerical: -1.513167 analytic: -1.513167, relative error: 5.124587e-08\n",
      "numerical: 2.095000 analytic: 2.095000, relative error: 2.182314e-08\n",
      "numerical: -2.186169 analytic: -2.186169, relative error: 8.571612e-09\n",
      "numerical: 1.249589 analytic: 1.257728, relative error: 3.246239e-03\n",
      "numerical: 0.192919 analytic: 0.196214, relative error: 8.467215e-03\n",
      "numerical: 0.415391 analytic: 0.417242, relative error: 2.222936e-03\n",
      "numerical: 4.051698 analytic: 4.052087, relative error: 4.800477e-05\n",
      "numerical: -0.556046 analytic: -0.564138, relative error: 7.223902e-03\n",
      "numerical: 0.928687 analytic: 0.930263, relative error: 8.478049e-04\n",
      "numerical: -3.851208 analytic: -3.851246, relative error: 4.936651e-06\n",
      "numerical: -0.674089 analytic: -0.679658, relative error: 4.113486e-03\n",
      "numerical: 0.801998 analytic: 0.799510, relative error: 1.553570e-03\n",
      "numerical: -1.688288 analytic: -1.691526, relative error: 9.581664e-04\n"
     ]
    }
   ],
   "source": [
    "# Complete the implementation of softmax_loss_naive and implement a (naive)\n",
    "# version of the gradient that uses nested loops.\n",
    "loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0)\n",
    "\n",
    "# As we did for the SVM, use numeric gradient checking as a debugging tool.\n",
    "# The numeric gradient should be close to the analytic gradient.\n",
    "from cs231n.gradient_check import grad_check_sparse\n",
    "f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 0.0)[0]\n",
    "grad_numerical = grad_check_sparse(f, W, grad, 10)\n",
    "\n",
    "# similar to SVM case, do another gradient check with regularization\n",
    "loss, grad = softmax_loss_naive(W, X_dev, y_dev, 5e1)\n",
    "f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 5e1)[0]\n",
    "grad_numerical = grad_check_sparse(f, W, grad, 10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Now that we have a naive implementation of the softmax loss function and its gradient,\n",
    "# implement a vectorized version in softmax_loss_vectorized.\n",
    "# The two versions should compute the same results, but the vectorized version should be\n",
    "# much faster.\n",
    "tic = time.time()\n",
    "loss_naive, grad_naive = softmax_loss_naive(W, X_dev, y_dev, 0.000005)\n",
    "toc = time.time()\n",
    "print('naive loss: %e computed in %fs' % (loss_naive, toc - tic))\n",
    "\n",
    "from cs231n.classifiers.softmax import softmax_loss_vectorized\n",
    "tic = time.time()\n",
    "loss_vectorized, grad_vectorized = softmax_loss_vectorized(W, X_dev, y_dev, 0.000005)\n",
    "toc = time.time()\n",
    "print('vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic))\n",
    "\n",
    "# As we did for the SVM, we use the Frobenius norm to compare the two versions\n",
    "# of the gradient.\n",
    "grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')\n",
    "print('Loss difference: %f' % np.abs(loss_naive - loss_vectorized))\n",
    "print('Gradient difference: %f' % grad_difference)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Use the validation set to tune hyperparameters (regularization strength and\n",
    "# learning rate). You should experiment with different ranges for the learning\n",
    "# rates and regularization strengths; if you are careful you should be able to\n",
    "# get a classification accuracy of over 0.35 on the validation set.\n",
    "from cs231n.classifiers import Softmax\n",
    "results = {}\n",
    "best_val = -1\n",
    "best_softmax = None\n",
    "learning_rates = [1e-7, 5e-7]\n",
    "regularization_strengths = [2.5e4, 5e4]\n",
    "\n",
    "################################################################################\n",
    "# TODO:                                                                        #\n",
    "# Use the validation set to set the learning rate and regularization strength. #\n",
    "# This should be identical to the validation that you did for the SVM; save    #\n",
    "# the best trained softmax classifer in best_softmax.                          #\n",
    "################################################################################\n",
    "for li in learning_rates:\n",
    "    for rsi in regularization_strengths:\n",
    "        svm = LinearSVM()\n",
    "        loss_hist = svm.train(X_train, y_train, learning_rate=li, reg=rsi,\n",
    "                          num_iters=1500, verbose=True)\n",
    "        y_train_pred = svm.predict(X_train)\n",
    "        y_train_acc = np.mean(y_train == y_train_pred)\n",
    "        y_val_pred = svm.predict(X_val)\n",
    "        y_val_acc = np.mean(y_val == y_val_pred)\n",
    "        print('learning rate is %.10f and regularization strength is %.10f'% (li,rsi) )\n",
    "        print('validation accuracy: %f \\n' % (y_val_acc,))\n",
    "        results[(li,rsi)]=(y_train_acc,y_val_acc)\n",
    "        if best_val<y_val_acc:\n",
    "            best_svm=svm\n",
    "            best_val=y_val_acc\n",
    "################################################################################\n",
    "#                              END OF YOUR CODE                                #\n",
    "################################################################################\n",
    "    \n",
    "# Print out results.\n",
    "for lr, reg in sorted(results):\n",
    "    train_accuracy, val_accuracy = results[(lr, reg)]\n",
    "    print('lr %e reg %e train accuracy: %f val accuracy: %f' % (\n",
    "                lr, reg, train_accuracy, val_accuracy))\n",
    "    \n",
    "print('best validation accuracy achieved during cross-validation: %f' % best_val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# evaluate on test set\n",
    "# Evaluate the best softmax on test set\n",
    "y_test_pred = best_softmax.predict(X_test)\n",
    "test_accuracy = np.mean(y_test == y_test_pred)\n",
    "print('softmax on raw pixels final test set accuracy: %f' % (test_accuracy, ))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Visualize the learned weights for each class\n",
    "w = best_softmax.W[:-1,:] # strip out the bias\n",
    "w = w.reshape(32, 32, 3, 10)\n",
    "\n",
    "w_min, w_max = np.min(w), np.max(w)\n",
    "\n",
    "classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']\n",
    "for i in range(10):\n",
    "    plt.subplot(2, 5, i + 1)\n",
    "    \n",
    "    # Rescale the weights to be between 0 and 255\n",
    "    wimg = 255.0 * (w[:, :, :, i].squeeze() - w_min) / (w_max - w_min)\n",
    "    plt.imshow(wimg.astype('uint8'))\n",
    "    plt.axis('off')\n",
    "    plt.title(classes[i])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
